Return Predictability with Agnostic Fundamental Analysis¶

Roy Gabriel, Michele Orlandi, Ryan Pate, Michael Smith¶

MGT6078 Fall 2022¶

1. Setup¶

In [ ]:
import os
import sys
sys.path.append(os.getcwd())
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.figure_factory as ff
from sklearn.linear_model import LinearRegression, Lasso, LassoCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
import gc
import warnings
In [ ]:
url = 'https://www.dropbox.com/s/iasfmrfdzafjkyq/student_data.csv?dl=1'
In [ ]:
gc.enable()
warnings.filterwarnings('ignore')

1.1 Helper Functions¶

In [ ]:
def get_scatter(xval: pd.Series, yval: pd.Series, n: str):
    fig = go.Scatter(
        x=xval,
        y=yval,
        name=n
    )
    return fig
In [ ]:
def time_series(df: pd.DataFrame, ttl: str):
    fig = go.Figure()
    for col in df.columns:
        fig.add_trace(
            get_scatter(df.index, df[col], col)
        )
    fig.update_layout(
        title=ttl + ' Monthly Cumulative Portfolio Value',
        xaxis_title='Date',
        yaxis_title='Portfolio Value ($1 on 1987-03-31)'
    )
    return fig
In [ ]:
def plot_distribution(data: list, labels: list, colors: list, title: str):
    fig = ff.create_distplot(
        data,
        labels,
        colors=colors,
        bin_size=[0.05] * len(data),
        show_curve=True,
        show_rug=False
    )
    fig.update(
        layout_title_text='{} Distribution'.format(title)
    )
    return fig

1.2 Clean Data¶

In [ ]:
# read data from Dropbox and remove extra identifiers
msf = pd.read_csv(url).drop(labels=['PERMNO', 'gvkey', 'COMNAM', 'TICKER', 'SICCD'], axis=1)
msf = msf.loc[:, msf.columns[1:]].copy()
gc.collect()
# get datetime objects and Market Value of each firm
msf['date'] = pd.to_datetime(msf['date'], format='%Y%m%d')
msf['nextmonth'] = pd.to_datetime(msf['nextmonth'], format='%Y%m%d')
msf['mktval'] = msf['PRC'] * msf['SHROUT']
In [ ]:
# store predictor variable names
predictor_names = msf.columns[7:-1].tolist()
# store prediction name
prediction_name = msf.columns[-1]
In [ ]:
msf.head()
Out[ ]:
date CUSIP PRC SHROUT RET nextmonth next_Ret atq dvpq seqq ... doq_MA4 nopiq_MA4 ibq_MA4 txtq_MA4 niq_MA4 cheq_MA4 saleq_MA4 dvy_MA4 piq_MA4 mktval
0 1987-03-31 00036110 33.50 9099.0 0.107438 1987-04-30 -0.111940 228.106 0.000 130.427 ... 0.0 0.44225 3.56150 2.85750 3.56150 3.78525 70.67700 2.58625 6.41900 304816.5
1 1987-03-31 10304310 38.25 9158.0 0.145522 1987-04-30 -0.049020 163.403 0.000 112.860 ... 0.0 0.66500 5.48775 5.37000 5.48775 26.73400 48.04150 2.54975 11.11825 350293.5
2 1987-03-31 89051610 61.25 4418.0 -0.020349 1987-04-30 -0.089796 79.886 0.000 64.583 ... 0.0 0.37900 3.20050 3.22475 3.20050 25.79550 27.85075 1.06600 6.42525 270602.5
3 1987-03-31 89109210 31.00 6633.0 -0.038610 1987-04-30 -0.032258 261.411 0.296 74.113 ... 0.0 -0.10925 4.20075 2.67900 4.20075 7.61300 112.99875 2.64300 6.87975 205623.0
4 1987-03-31 89190610 26.00 15916.0 -0.223881 1987-04-30 -0.028846 27.390 0.000 24.649 ... 0.0 0.06650 1.30950 1.29375 1.30950 4.86050 9.08825 0.00000 2.60325 413816.0

5 rows × 35 columns

2. Model 1 - OLS¶

2.1 Helper Functions¶

In [ ]:
def scale_data(df: pd.DataFrame):
    scaler = StandardScaler()
    scaled = scaler.fit_transform(df)
    return scaled
In [ ]:
def get_regression(df: pd.DataFrame, predictors: list, prediction: str):
    X = df.loc[:, predictors]
    y = df.loc[:, prediction]
    reg = LinearRegression()
    # x_scaled = scale_data(X)
    reg.fit(X, y)
    y_pred = reg.predict(X)
    return y_pred

2.2 Mispricing Signal¶

The mispricing signal is calculated as: $$ M_{j,t} = {{FairValuePrediction_{j,t} - MarketValue_{j,t}} \over MarketValue_{j,t}} $$ Where:

  • $FairValuePrediction_{j,t}$ is defined as firm $j$'s Market Value on month $t$ predicted by a simple OLS
  • $MarketValue_{j,t}$ is defined as firm $j$'s Market Value on month $t$ calculated as $PRC * SHROUT$
  • $M_{j,t}$ is the mispricing signal for firm $j$ on month $t$
In [ ]:
# get monthly fair value for each firm through simple OLS
linear_fair_value = pd.DataFrame(msf.groupby('date').apply(
    lambda x: pd.Series(
        get_regression(x, predictor_names, prediction_name),
        name='linear_fvp'
    ))).reset_index()
In [ ]:
# add results to main data frame
msf['linear_fvp'] = linear_fair_value['linear_fvp']
# calculate mispricing signal
msf['linear_sig'] = (msf['linear_fvp'] - msf['mktval']) / msf['mktval']

3. Model 2 - OLS Post-LASSO¶

3.1 Helper Functions¶

In [ ]:
# run LASSO Regression to extract relevant features
def feature_selection(df: pd.DataFrame, predictors: list, prediction):
    X = df.loc[:, predictors]
    y = df.loc[:, prediction]
    # x_scaled = scale_data(X)

    lasso = Lasso(max_iter=10000, normalize=True)
    lasso_cv = LassoCV(alphas=None, cv=10, max_iter=10000, normalize=True)
    # fit Cross Validation to get optimal alpha
    lasso_cv.fit(X, y)
    lasso.alpha = lasso_cv.alpha_
    # fit Lasso with optimal penalty
    lasso.fit(X, y)
    # select optimal features -> coefficient > 0
    coefficients = lasso.coef_
    selected_features = list(np.array(predictors)[coefficients > 0])

    return selected_features
In [ ]:
# run OLS post-LASSO feature selection
def ols_post_lasso(df: pd.DataFrame, predictors: list, prediction: str):
    
    # use LASSO regression to select relevant features
    relevant_feats = feature_selection(df, predictors, prediction)
    # get predicted values from OLS with selected features
    y_pred = get_regression(df, relevant_feats, prediction)
    
    return y_pred

3.2 Mispricing Signal¶

The mispricing signal is calculated as before, except we select the relevant explanatory features through a Least Absolute Selection and Shrinkage Operator.

  • Firstly fit monthly data to a LASSO Cross-Validation to find the optimal $l_1$ penalty $\alpha$
  • Then fit the data to the LASSO Regressor with the optimal $\alpha$
  • Select the most relevant coefficients: $\beta_i > 0$
  • Run OLS with only selected explanatory variables to predict fair values
In [ ]:
# calculate predicted fair value with post-LASSO regression
lasso_fair_value = pd.DataFrame(msf.groupby('date').apply(
    lambda x: pd.Series(
        ols_post_lasso(x, predictor_names, prediction_name),
        name='post_lasso_fvp'
    ))).reset_index()
In [ ]:
# calculate OLS post-LASSO mispricing signal
msf['lasso_fvp'] = lasso_fair_value['post_lasso_fvp']
msf['lasso_sig'] = (msf['lasso_fvp'] - msf['mktval']) / msf['mktval']

4. Model 3 - Random Forest Regressor¶

4.1 Helper Functions¶

In [ ]:
def get_random_forest(df: pd.DataFrame, predictors: list, prediction: str):
    X = df.loc[:, predictors]
    y = df.loc[:, prediction]
    x_scaled = scale_data(X) # NOTE: Can possibly remove

    rf_reg = RandomForestRegressor(
        n_estimators=1000,
        random_state=42,
        min_samples_leaf=20,
        max_depth=100,
        n_jobs=-1
        )
    rf_reg.fit(x_scaled, y)
    y_pred = rf_reg.predict(x_scaled)
    return y_pred

4.2 Mispricing Signal¶

In [ ]:
# calculate predicted fair value with Random Forest regression
rf_fair_value = pd.DataFrame(msf.groupby('date').apply(
    lambda x: pd.Series(
        get_random_forest(x, predictor_names, prediction_name),
        name='rf_fvp'
    ))).reset_index()
In [ ]:
# calculate mispricing signal
msf['rf_fvp'] = rf_fair_value['rf_fvp']
msf['rf_sig'] = (msf['rf_fvp'] - msf['mktval']) / msf['mktval']

5. Portfolio Construction¶

5.1 Sorting Signal Quintiles¶

In [ ]:
# rank stocks in quintiles based on the signal quantiles
msf['linear_quintiles'] = msf.groupby('date', group_keys=False)['linear_sig'].apply(
    lambda x: pd.qcut(
        x.rank(method='first'),
        5,
        labels=['Q{}'.format(i) for i in range(1, 6)]
    )
)
msf['lasso_quintiles'] = msf.groupby('date', group_keys=False)['lasso_sig'].apply(
    lambda x: pd.qcut(
        x.rank(method='first'),
        5,
        labels=['Q{}'.format(i) for i in range(1, 6)]
    )
)
msf['rf_quintiles'] = msf.groupby('date', group_keys=False)['rf_sig'].apply(
    lambda x: pd.qcut(
        x.rank(method='first'),
        5,
        labels=['Q{}'.format(i) for i in range(1, 6)]
    )
)
# add portfolio value
msf['asset_price'] = msf['PRC'] * msf['next_Ret']
In [ ]:
# store required variables for portfolio construction
portfolio_labs = ['nextmonth', 'CUSIP', 'next_Ret', 'asset_price']
In [ ]:
msf.head()
Out[ ]:
date CUSIP PRC SHROUT RET nextmonth next_Ret atq dvpq seqq ... linear_fvp linear_sig lasso_fvp lasso_sig rf_fvp rf_sig linear_quintiles lasso_quintiles rf_quintiles asset_price
0 1987-03-31 00036110 33.50 9099.0 0.107438 1987-04-30 -0.111940 228.106 0.000 130.427 ... 456880.662951 0.498871 447478.401169 0.468026 291902.902475 -0.042365 Q3 Q2 Q2 -3.749990
1 1987-03-31 10304310 38.25 9158.0 0.145522 1987-04-30 -0.049020 163.403 0.000 112.860 ... 408503.015260 0.166174 472120.883363 0.347787 448585.047065 0.280598 Q2 Q2 Q3 -1.875015
2 1987-03-31 89051610 61.25 4418.0 -0.020349 1987-04-30 -0.089796 79.886 0.000 64.583 ... 275588.243491 0.018425 357723.212734 0.321951 276894.497616 0.023252 Q2 Q2 Q2 -5.500005
3 1987-03-31 89109210 31.00 6633.0 -0.038610 1987-04-30 -0.032258 261.411 0.296 74.113 ... 330098.489303 0.605358 374531.419583 0.821447 284665.347578 0.384404 Q3 Q3 Q4 -0.999998
4 1987-03-31 89190610 26.00 15916.0 -0.223881 1987-04-30 -0.028846 27.390 0.000 24.649 ... 146818.190213 -0.645209 256074.151786 -0.381188 161779.517648 -0.609054 Q1 Q1 Q1 -0.749996

5 rows × 45 columns

5.2 OLS Portfolio¶

In [ ]:
# slice worst and best performers to build portfolios
linear_q1 = msf.loc[msf.linear_quintiles == 'Q1'].loc[:, portfolio_labs + ['linear_sig']].copy()
linear_q5 = msf.loc[msf.linear_quintiles == 'Q5'].loc[:, portfolio_labs + ['linear_sig']].copy()
In [ ]:
# create signal-weighted retruns
linear_q1['sig_ret'] = linear_q1['linear_sig'] * linear_q1['next_Ret']
linear_q5['sig_ret'] = linear_q5['linear_sig'] * linear_q5['next_Ret']
linear_q1['sig_price'] = linear_q1['linear_sig'] * linear_q1['asset_price']
linear_q5['sig_price'] = linear_q5['linear_sig'] * linear_q5['asset_price']
In [ ]:
# equal-weighted portfolio monthly returns
lin_q1_rets = linear_q1.groupby('nextmonth', as_index=False).agg(
    {'next_Ret': 'mean', 'asset_price': 'mean'}
    ).rename(columns={'next_Ret': 'Q1_returns', 'asset_price': 'Q1_values'})
lin_q5_rets = linear_q5.groupby('nextmonth', as_index=False).agg(
    {'next_Ret': 'mean', 'asset_price': 'mean'}
    ).rename(columns={'next_Ret': 'Q5_returns', 'asset_price': 'Q5_values'})
linear_eqw = lin_q1_rets.merge(lin_q5_rets, on='nextmonth', how='inner').set_index('nextmonth')
linear_eqw['Strategy'] = linear_eqw['Q5_returns'] - linear_eqw['Q1_returns']
linear_eqw['Strategy_Values'] = linear_eqw['Q5_values'] - linear_eqw['Q1_values']
In [ ]:
# cumulative equal-weighted returns 
linear_eqw['Q1_cumulative'] = (linear_eqw['Q1_returns'] + 1).cumprod() - 1
linear_eqw['Q5_cumulative'] = (linear_eqw['Q5_returns'] + 1).cumprod() - 1
linear_eqw['Strategy_Cumulative'] = (linear_eqw['Strategy'] + 1).cumprod() - 1
In [ ]:
linear_eqw.head()
Out[ ]:
Q1_returns Q1_values Q5_returns Q5_values Strategy Strategy_Values Q1_cumulative Q5_cumulative Strategy_Cumulative
nextmonth
1987-04-30 -0.020665 -0.623214 -0.023487 -0.195654 -0.002822 0.427560 -0.020665 -0.023487 -0.002822
1987-05-29 0.003433 0.102336 -0.003725 0.024674 -0.007159 -0.077662 -0.017302 -0.027125 -0.009961
1987-06-30 0.014769 0.724104 0.029162 0.326602 0.014393 -0.397502 -0.002789 0.001246 0.004288
1987-07-31 0.047469 1.677297 0.030508 0.383517 -0.016961 -1.293780 0.044548 0.031792 -0.012745
1987-08-31 0.022547 1.081732 0.020449 0.186669 -0.002098 -0.895063 0.068099 0.052892 -0.014816
In [ ]:
# signal-weighted monthly returns
lin_q1_sig_rets = linear_q1.groupby('nextmonth', as_index=False).agg(
    {'sig_ret': 'mean', 'asset_price': 'mean'}
    ).rename(columns={'sig_ret': 'Q1_returns', 'asset_price': 'Q1_values'})
lin_q5_sig_rets = linear_q5.groupby('nextmonth', as_index=False).agg(
    {'sig_ret': 'mean', 'asset_price': 'mean'}
    ).rename(columns={'sig_ret': 'Q5_returns', 'asset_price': 'Q5_values'})
linear_sgw = lin_q1_sig_rets.merge(lin_q5_sig_rets, on='nextmonth', how='inner').set_index('nextmonth')
linear_sgw['Strategy'] = linear_sgw['Q5_returns'] - linear_sgw['Q1_returns']
linear_sgw['Strategy_Values'] = linear_sgw['Q5_values'] - linear_sgw['Q1_values']
In [ ]:
# cumulative signal-weighted returns 
linear_sgw['Q1_cumulative'] = (linear_sgw['Q1_returns'] + 1).cumprod() - 1
linear_sgw['Q5_cumulative'] = (linear_sgw['Q5_returns'] + 1).cumprod() - 1
linear_sgw['Strategy_Cumulative'] = (linear_sgw['Strategy'] + 1).cumprod() - 1
In [ ]:
lin_eqw_fig = time_series(df=linear_eqw.loc[:, linear_eqw.columns[6:]], ttl='OLS Equal-Weighted')
lin_eqw_fig.show(renderer='notebook')
In [ ]:
lin_sgw_fig = time_series(df=linear_sgw.loc[:, linear_sgw.columns[6:]], ttl='OLS Signal-Weighted')
lin_sgw_fig.show(renderer='notebook')

5.3 OLS Post-LASSO Portfolio¶

In [ ]:
lasso_q1 = msf.loc[msf.lasso_quintiles == 'Q1'].loc[:, portfolio_labs + ['lasso_sig']].copy()
lasso_q5 = msf.loc[msf.lasso_quintiles == 'Q5'].loc[:, portfolio_labs + ['lasso_sig']].copy()
lasso_q1['sig_ret'] = lasso_q1['next_Ret'] * lasso_q1['lasso_sig']
lasso_q5['sig_ret'] = lasso_q5['next_Ret'] * lasso_q5['lasso_sig']
lasso_q1['sig_price'] = lasso_q1['lasso_sig'] * lasso_q1['asset_price']
lasso_q5['sig_price'] = lasso_q5['lasso_sig'] * lasso_q5['asset_price']
In [ ]:
# equal-weighted portfolio returns
lasso_q1_rets = lasso_q1.groupby('nextmonth', as_index=False).agg(
    {'next_Ret': 'mean', 'asset_price': 'mean'}
    ).rename(columns={'next_Ret': 'Q1_returns', 'asset_price': 'Q1_values'})
lasso_q5_rets = lasso_q5.groupby('nextmonth', as_index=False).agg(
    {'next_Ret': 'mean', 'asset_price': 'mean'}
    ).rename(columns={'next_Ret': 'Q5_returns', 'asset_price': 'Q5_values'})
lasso_rets = lasso_q1_rets.merge(lasso_q5_rets, on='nextmonth', how='inner').set_index('nextmonth')
lasso_rets['Strategy'] = lasso_rets['Q5_returns'] - lasso_rets['Q1_returns']
lasso_rets['Strategy_Values'] = lasso_rets['Q5_values'] - lasso_rets['Q1_values']
# signal-weighted portfolio returns
lasso_q1_sig_rets = lasso_q1.groupby('nextmonth', as_index=False).agg(
    {'sig_ret': 'mean', 'asset_price': 'mean'}
    ).rename(columns={'sig_ret': 'Q1_returns', 'asset_price': 'Q1_values'})
lasso_q5_sig_rets = lasso_q5.groupby('nextmonth', as_index=False).agg(
    {'sig_ret': 'mean', 'asset_price': 'mean'}
    ).rename(columns={'sig_ret': 'Q5_returns', 'asset_price': 'Q5_values'})
lasso_sig_rets = lasso_q1_sig_rets.merge(lasso_q5_sig_rets, on='nextmonth', how='inner').set_index('nextmonth')
lasso_sig_rets['Strategy'] = lasso_sig_rets['Q5_returns'] - lasso_sig_rets['Q1_returns']
lasso_sig_rets['Strategy_Values'] = lasso_sig_rets['Q5_values'] - lasso_sig_rets['Q1_values']
In [ ]:
# cumulative returns for equal-weighted portfolio
lasso_rets['Q1_cumulative'] = (lasso_rets['Q1_returns'] + 1).cumprod() - 1
lasso_rets['Q5_cumulative'] = (lasso_rets['Q5_returns'] + 1).cumprod() - 1
lasso_rets['Strategy_Cumulative'] = (lasso_rets['Strategy'] + 1).cumprod() - 1
# cumulative returns for signal-weighted portfolio
lasso_sig_rets['Q1_cumulative'] = (lasso_sig_rets['Q1_returns'] + 1).cumprod() - 1
lasso_sig_rets['Q5_cumulative'] = (lasso_sig_rets['Q5_returns'] + 1).cumprod() - 1 
lasso_sig_rets['Strategy_Cumulative'] = (lasso_sig_rets['Strategy'] + 1).cumprod() - 1
In [ ]:
lasso_fig = time_series(df=lasso_rets.loc[:, lasso_rets.columns[6:]], ttl='OLS Post-LASSO Equal-Weighted')
lasso_fig.show(renderer='notebook')
In [ ]:
lasso_sig_fig = time_series(df=lasso_sig_rets.loc[:, lasso_sig_rets.columns[6:]], ttl='OLS Post-LASSO Signal-Weighted')
lasso_sig_fig.show(renderer='notebook')

5.4 Random Forest Portfolio¶

In [ ]:
rf_q1 = msf.loc[msf.rf_quintiles == 'Q1'].loc[:, portfolio_labs + ['rf_sig']].copy()
rf_q5 = msf.loc[msf.rf_quintiles == 'Q5'].loc[:, portfolio_labs + ['rf_sig']].copy()
rf_q1['sig_ret'] = rf_q1['next_Ret'] * rf_q1['rf_sig']
rf_q5['sig_ret'] = rf_q5['next_Ret'] * rf_q5['rf_sig']
rf_q1['sig_price'] = rf_q1['rf_sig'] * rf_q1['asset_price']
rf_q5['sig_price'] = rf_q5['rf_sig'] * rf_q5['asset_price']
In [ ]:
# equal-weighted portfolio returns
rf_q1_rets = rf_q1.groupby('nextmonth', as_index=False).agg(
    {'next_Ret': 'mean', 'asset_price': 'mean'}
    ).rename(columns={'next_Ret': 'Q1_returns', 'asset_price': 'Q1_values'})
rf_q5_rets = rf_q5.groupby('nextmonth', as_index=False).agg(
    {'next_Ret': 'mean', 'asset_price': 'mean'}
    ).rename(columns={'next_Ret': 'Q5_returns', 'asset_price': 'Q5_values'})
rf_rets = rf_q1_rets.merge(rf_q5_rets, on='nextmonth', how='inner').set_index('nextmonth')
rf_rets['Strategy'] = rf_rets['Q5_returns'] - rf_rets['Q1_returns']
rf_rets['Strategy_Values'] = rf_rets['Q5_values'] - rf_rets['Q1_values']
# signal-weighted portfolio returns
rf_q1_sig_rets = rf_q1.groupby('nextmonth', as_index=False).agg(
    {'sig_ret': 'mean', 'asset_price': 'mean'}
    ).rename(columns={'sig_ret': 'Q1_returns', 'asset_price': 'Q1_values'})
rf_q5_sig_rets = rf_q5.groupby('nextmonth', as_index=False).agg(
    {'sig_ret': 'mean', 'asset_price': 'mean'}
    ).rename(columns={'sig_ret': 'Q5_returns', 'asset_price': 'Q5_values'})
rf_sig_rets = rf_q1_sig_rets.merge(rf_q5_sig_rets, on='nextmonth', how='inner').set_index('nextmonth')
rf_sig_rets['Strategy'] = rf_sig_rets['Q5_returns'] - rf_sig_rets['Q1_returns']
rf_sig_rets['Strategy_Values'] = rf_sig_rets['Q5_values'] - rf_sig_rets['Q1_values']
In [ ]:
# cumulative returns for equal-weighted portfolio
rf_rets['Q1_cumulative'] = (rf_rets['Q1_returns'] + 1).cumprod() - 1
rf_rets['Q5_cumulative'] = (rf_rets['Q5_returns'] + 1).cumprod() - 1
rf_rets['Strategy_Cumulative'] = (rf_rets['Strategy'] + 1).cumprod() - 1
# cumulative returns for signal-weighted portfolio
rf_sig_rets['Q1_cumulative'] = (rf_sig_rets['Q1_returns'] + 1).cumprod() - 1
rf_sig_rets['Q5_cumulative'] = (rf_sig_rets['Q5_returns'] + 1).cumprod() - 1
rf_sig_rets['Strategy_Cumulative'] = (rf_sig_rets['Strategy'] + 1).cumprod() - 1
In [ ]:
rf_rets_fig = time_series(df=rf_rets.loc[:, rf_rets.columns[6:]], ttl='Random Forest Equal-Weighted')
rf_rets_fig.show(renderer='notebook')
In [ ]:
rf_sig_fig = time_series(df=rf_sig_rets.loc[:, rf_sig_rets.columns[6:]], ttl='Random Forest Signal-Weighted')
rf_sig_fig.show(renderer='notebook')

6. Performance¶

In [ ]:
# set inputs to compare strategy return distribution
eqw_rets = [linear_eqw['Strategy'].values, lasso_rets['Strategy'].values, rf_rets['Strategy'].values]
sgw_rets = [linear_sgw['Strategy'].values, lasso_sig_rets['Strategy'].values, rf_sig_rets['Strategy'].values]
ret_labels = ['{} Strategy Returns'.format(model) for model in ['OLS', 'OLS Post-LASSO', 'Random Forest']]
colors = ['#A569BD', '#F1C40F', '#273746']
In [ ]:
# equal-weighted returns
df_rets = pd.concat(
    [
        linear_eqw.loc[:, 'Strategy'],
        lasso_rets.loc[:, 'Strategy'],
        rf_rets.loc[:, 'Strategy']
        ],
        axis=1
        )
df_rets.columns = ['{}_strategy'.format(method) for method in ['linear', 'lasso', 'rf']]
# signal-weighted returns
df_sig_rets = pd.concat(
    [
        linear_sgw.loc[:, 'Strategy'],
        lasso_sig_rets.loc[:, 'Strategy'],
        rf_sig_rets.loc[:, 'Strategy']
        ],
        axis=1
        )
df_sig_rets.columns = ['{}_strategy'.format(method) for method in ['linear', 'lasso', 'rf']]

6.1 Helper Functions¶

In [ ]:
def get_stats(df: pd.DataFrame):
    stats = df.copy().describe()
    stats.loc['kurtosis'] = df.kurt()
    stats.loc['skewness'] = df.skew()
    return stats
In [ ]:
def get_bar(xval: pd.Series, yval: pd.Series, n: str):
    fig = go.Bar(
        x=xval,
        y=yval,
        name=n
    )
    return fig
In [ ]:
def plot_stats(df: pd.DataFrame, ttl: str):
    fig = go.Figure(
        data=[
            get_bar(xval=df.index, yval=df[col], n=col) for col in df.columns
        ]
    )
    fig.update_layout(
        title=ttl,
        xaxis_title='Metric',
        yaxis_title='Value',
        barmode='group'
    )
    return fig
In [ ]:
def get_sr(stats_df: pd.DataFrame, rf: float):
        return (stats_df.loc['mean'] - rf)/stats_df.loc['std']
In [ ]:
def get_MDD(df: pd.DataFrame, col: str, window : int = 12, min_periods : int = 1):
    """
    As taken from https://quant.stackexchange.com/questions/18094/how-can-i-calculate-the-maximum-drawdown-mdd-in-python
    """
    # Change min_periods if you want to let the first X days data have an expanding window
    max_rolling_window = df[col].rolling(window, min_periods=1).max()
    DD = df[col]/max_rolling_window - 1.0
    return DD.rolling(window, min_periods=1).min()

6.2 Return Distribution¶

In [ ]:
eqw_distrib = plot_distribution(data=eqw_rets, labels=ret_labels, colors=colors, title='Equal-Weighted Portfolios Strategy Returns')
eqw_distrib.show(renderer='notebook')
In [ ]:
sgw_distrib = plot_distribution(data=sgw_rets, labels=ret_labels, colors=colors, title='Signal-Weighted Portfolios Strategy Returns')
sgw_distrib.show(renderer='notebook')

6.3 Descriptive Statistics¶

In [ ]:
# equal-weighted statistics
eqw_stats = get_stats(df_rets)
eqw_stats
Out[ ]:
linear_strategy lasso_strategy rf_strategy
count 310.000000 310.000000 310.000000
mean 0.007734 0.005971 0.007340
std 0.031684 0.045262 0.034602
min -0.178231 -0.331893 -0.158035
25% -0.009192 -0.011552 -0.010823
50% 0.006142 0.006095 0.006415
75% 0.022721 0.021554 0.024474
max 0.158573 0.261344 0.190178
kurtosis 7.584601 16.988515 5.997929
skewness 0.152737 -0.789510 0.252297
In [ ]:
# signal-weighted statistics
sgw_stats = get_stats(df_sig_rets)
sgw_stats
Out[ ]:
linear_strategy lasso_strategy rf_strategy
count 310.000000 310.000000 310.000000
mean 0.081437 0.088887 0.022604
std 0.422306 0.487088 0.162808
min -1.397730 -2.934913 -0.542772
25% -0.110666 -0.142098 -0.048055
50% 0.084073 0.112983 0.030816
75% 0.275032 0.348066 0.093782
max 1.689902 2.369476 0.742762
kurtosis 2.424560 6.540261 3.064944
skewness -0.097280 -0.787546 -0.206759
In [ ]:
# store stats to be plotted
stats_labs = ['mean', 'std', '50%']
In [ ]:
eqw_stats_fig = plot_stats(eqw_stats.loc[eqw_stats.index.isin(stats_labs)], 'Portfolio Descriptive Statistics')
eqw_stats_fig.show(renderer='notebook')

6.4 Sharpe Ratio¶

In [ ]:
# TODO: Need to plot this:
eq_sharpe = pd.DataFrame(get_sr(eqw_stats, rf=0.0416), columns=['Sharpe_Ratio'])
eqs_fig = plot_stats(eq_sharpe, 'Equal-Weighted Sharpe Ratios')
eqs_fig.show(renderer='notebook')
In [ ]:
sgw_sharpe = pd.DataFrame(get_sr(sgw_stats, rf=0.0416), columns=['Sharpe_Ratio'])
sgs_fig = plot_stats(sgw_sharpe, 'Signal Weighted Sharpe Ratios')
sgs_fig.show(renderer='notebook')

6.5 Max Drawdown¶

In [ ]:
# get MDD for equal weighted portfolios
eqw_mdd = pd.DataFrame(index=linear_eqw.index)
eqw_mdd['linear_MDD'] = get_MDD(linear_eqw, 'Strategy_Values')
eqw_mdd['lasso_MDD'] = get_MDD(lasso_rets, 'Strategy_Values')
eqw_mdd['rf_MDD'] = get_MDD(rf_rets, 'Strategy_Values')
In [ ]:
eqw_mdd_fig = time_series(eqw_mdd, 'Equal Weighted Max Drawdown')
eqw_mdd_fig.show(renderer='notebook')
In [ ]:
# get signal weighted max drawdown
sgw_mdd = pd.DataFrame(index=linear_sgw.index)
sgw_mdd['linear_MDD'] = get_MDD(linear_sgw, 'Strategy_Values')
sgw_mdd['lasso_MDD'] = get_MDD(lasso_sig_rets, 'Strategy_Values')
sgw_mdd['rf_MDD'] = get_MDD(rf_sig_rets, 'Strategy_Values')
In [ ]:
sgw_mdd_fig = time_series(sgw_mdd, 'Signal Weighted Max Drawdown')
sgw_mdd_fig.show(renderer='notebook')